clear all
set more off
set mem 10000000
set matsize 10000
version 15

********************************************************************************************
*** RDROBUST reduced-form, Economic Census outcomes: splits on RGGVY treatment intensity ***
********************************************************************************************

** Set file paths
do "$path_code/paths.do"

** Set graph scheme
cd "$path/code/analyze"
set scheme fb, perm

****************************************************************** 
****************************************************************** 

{
use "$panel/panel_dataset_full.dta", clear

	// Bring in RGGVY district-level administrative data (10th Plan only)
preserve
use "$rggvy/rggvy_district_progress_X_XI_processed.dta", clear
egen dt_group = group(st_code dt_code)
egen temp_group = group(plan implement_type)
unique st_code, by(temp_group) gen(uniq_st)
unique dt_group, by(temp_group) gen(uniq_dt)
unique dpr_code, by(temp_group) gen(uniq_dpr)
unique st_code if plan==10
unique st_code if plan==11
unique dt_group if plan==10
unique dt_group if plan==11
count if award_date<sanction_date
replace award_date = sanction_date if award_date<sanction_date
replace award_date = max(award_date,17553) if plan==11
keep if plan==10
collapse (min) min_award_date=award_date (max) max_award_date=award_date ///
	(sum) award_cost total_released achiev_UDE achiev_ELEC achiev_BPL ///
	, by(st_code dt_code)	
gen med_award_date = round((min_award_date+max_award_date)/2,1)
format %td med_award_date	
tempfile admin
save `admin'
restore	
merge m:1 st_code dt_code using `admin' 

	// RGGVY admin splitter 1: (# villages treated) / (# villages in district)
egen temp1 = count(vi_code) if tot_p>=300, by(st_code dt_code)
egen temp2 = mode(temp1), by(st_code dt_code)
gen RGGVY_share_villages_300 = (achiev_UDE + achiev_ELEC) / temp2
egen temp3 = count(vi_code), by(st_code dt_code)
gen RGGVY_share_villages_all = (achiev_UDE + achiev_ELEC) / temp3
egen temp_tag = tag(st_code dt_code) 
twoway scatter RGGVY_share_villages_300 RGGVY_share_villages_all if temp_tag
tab state if temp_tag & RGGVY_share_villages_300>1.4 & RGGVY_share_villages_all!=. & corr_state==1
gen RGGVY_rule_breaker = RGGVY_share_villages_300>1.4
twoway scatter RGGVY_share_villages_300 RGGVY_share_villages_all if temp_tag & RGGVY_rule_breaker==0
sum RGGVY_share_villages_all if temp_tag & RGGVY_rule_breaker==0, detail
sum RGGVY_share_villages_300 if temp_tag & RGGVY_rule_breaker==0, detail
// split on 60% of villages in district
drop temp*

	// RGGVY admin splitter 2: (# BPL HHs treated) / (# village HHs in district)
egen temp1 = sum(no_hh11) if tot_p>=300, by(st_code dt_code)
egen temp2 = mode(temp1), by(st_code dt_code)
gen RGGVY_share_hh_300 = (achiev_BPL) / temp2
egen temp3 = sum(no_hh11), by(st_code dt_code)
gen RGGVY_share_hh_all = (achiev_BPL) / temp3
egen temp_tag = tag(st_code dt_code) 
twoway scatter RGGVY_share_hh_300 RGGVY_share_hh_all if temp_tag 
twoway scatter RGGVY_share_hh_300 RGGVY_share_hh_all if temp_tag & RGGVY_rule_breaker==0
sum RGGVY_share_hh_all if temp_tag & RGGVY_rule_breaker==0, detail
sum RGGVY_share_hh_300 if temp_tag & RGGVY_rule_breaker==0, detail
// split on 10% of HHs in district
drop temp*

	// RGGVY admin splitter 3: (Rs allocated) / (# villages treated)
egen temp_tag = tag(st_code dt_code) 
gen RGGVY_lakh_per_v = (total_released) /  (achiev_UDE + achiev_ELEC)
sum RGGVY_lakh_per_v if temp_tag & RGGVY_rule_breaker==0, detail
// split on 10 lakh per village
drop temp*

	// Keep only districts with high treatment intensity (by village)
keep if RGGVY_rule_breaker==0 & RGGVY_share_villages_all>=0.6

preserve	
use "$panel/ec_shrid_pc01_panel.dta", clear	
tab count_dup_vill_ec, missing
drop if count_dup_vill_ec==1
tab count_dup_shrid, missing
drop if count_dup_shrid!=0
tab flag_ec
drop if flag_ec==1
keep pca01_id ec_*
reshape wide ec_employees ec_firms, i(pca01_id) j(ec_year)
tempfile ec
save `ec'
restore
merge 1:1 pca01_id using `ec', nogen keep(3)


	// Keep villages in RD sample
gen in_fs_sample = vplan4<11 & corr_state==1 & sample==1 & sample_h==1 & sing_h==1 & pop_non_zero==1 
keep if in_fs_sample==1
	
	// Create state FEs (since RD robust doesn't let you pass them through)
drop if st_code==32 // Kerala, only 3 villages
tab st_code if in_fs_sample==1, gen(STFEfs)	
drop STFEfs1 // to avoid collinearity

	// Create district FEs (since RD robust doesn't let you pass them through)
gen stdtFE = stdt
tab stdtFE state     if inlist(stdtFE,62,63,64,70,91,175,180,181,202,308,309,320,490,493,494,495,504,505,508)	
egen temp = count(stdt) if inrange(tot_p,250,350) & ec_firms2013!=. & ec_firms2005!=., by(stdtFE)
egen temp2 = mean(temp), by(stdtFE)
replace stdtFE = 999 if temp2<15 | temp2==.
drop temp*

tab stdtFE if in_fs_sample==1 & inrange(tot_p,250,350)

	// one catch-all district FE for districts with so few in-sample villages that they break rdrobust
tab stdtFE if in_fs_sample==1, gen(DTFEfs)	
drop DTFEfs1 // to avoid collinearity

	// Create block groups (for clustering)
egen stdtbk = group(stdt bk_code)

	// Create lights-difference variable, to identify crazy outliers
gen lights_diff = abs(lights_max2011_hat - lights_max2001_hat)


	// Create macros for lists of outcome variables 
global yvars = "ec_firms2013 ec_employees2013 "


	// Create variables to store regresson results
gen rf_step = .
gen yvar = ""
gen ifs = ""
gen controls_base = ""
gen control = ""
gen fe = ""
gen kernel = ""
gen bwmethod = ""
gen vce = ""
gen polynomial_order = .
gen beta_conv = .
gen beta_robust = .
gen se_conv = .
gen se_robust = .
gen pval_conv = .
gen pval_robust = .
gen lci_conv = .
gen uci_conv = .
gen lci_robust = .
gen uci_robust = .
gen bw_lo = .
gen bw_hi = .
gen nobs_orig = .
gen nobs_left = .
gen nobs_right = .
gen nobs_total = .
gen ndist = . 
gen ymean = .
gen ftag = ""
gen dprtag = ""
global h_wide = 200


	// Massive loop over RDROBUST outcomes and sensitivities
foreach y in $yvars {

	// Prep to store results
	foreach v of varlist rf_step-ftag {
		cap replace `v' = ""
		cap replace `v' = .
	}
	local row = 0

	// Define outcome-specific stuff
	global yvar = "`y'"
	global control = subinstr("$yvar","2013","2005",1)
	do "$path_code/analyze/RDROBUST_rf_outcomes_graphspecs.do"

	//Loop through sensitivities
	foreach rf_step in 1 4 5 11 {

		// Reset RDROBUST defaults
		local ifs = "in_fs_sample==1 & pop_mismatch20==0 & lights_diff<20 & RGGVY_rule_breaker==0 & RGGVY_share_villages_all>=0.6"
		local controls_base = "lights_max1998 lights_max1999 lights_max2000 lights_max2001 lights_max2002 lights_max2003 lights_max2004 lights_max2005" 
		local fe = "STFEfs*"
		local kernel = "tri"
		local bwmethod = "mserd"
		local vce = ""
		local poly = 1
		local graphtag = "fs"

		// Define step-specific RDROBUST settings
		if inlist(`rf_step',1) {
			local folder = "RDROBUST plots rf outcomes preferred"
			local ftag = "RF outcomes, preferred"
			local title2 = ""
		}
		if inlist(`rf_step',2) {
			local folder = "RDROBUST plots rf outcomes popmismatch"
			local ftag = "RF outcomes, popmismatch"
			local title2 = "(population mismatches)"
			local ifs = "in_fs_sample==1 & lights_diff<20"
		}
		if inlist(`rf_step',3) {
			local folder = "RDROBUST plots rf outcomes lights outliers"
			local ftag = "RF outcomes, lights outliers"
			local title2 = "(lights outliers)"
			local ifs = "in_fs_sample==1 & pop_mismatch20==0"
		}
		if inlist(`rf_step',4) {
			local folder = "RDROBUST plots rf outcomes epa kernel"
			local ftag = "RF outcomes, epa kernel"
			local title2 = "(Epanechnikov kernel)"
			local kernel = "epa"
		}
		if inlist(`rf_step',5) {
			local folder = "RDROBUST plots rf outcomes uni kernel"
			local ftag = "RF outcomes, uni kernel"
			local title2 = "(uniform kernel)"
			local kernel = "uni"
		}
		if inlist(`rf_step',6) {
			local folder = "RDROBUST plots rf outcomes no lights controls"
			local ftag = "RF outcomes, no lights controls"
			local title2 = "(no lights controls)"
			local controls_base = ""
		}
		if inlist(`rf_step',7) {
			local folder = "RDROBUST plots rf outcomes no FEs"
			local ftag = "RF outcomes, no FEs"
			local title2 = "(no fixed effects)"
			local fe = ""
		}
		if inlist(`rf_step',8) {
			local folder = "RDROBUST plots rf outcomes district FEs"
			local ftag = "RF outcomes, district FEs"
			local title2 = "(district fixed effects)"
			local fe = "DTFEfs*"
		}
		if inlist(`rf_step',9) {
			local folder = "RDROBUST plots rf outcomes nncluster by district"
			local ftag = "RF outcomes, nncluster by district"
			local title2 = "(nncluster by district)"
			local vce = "vce(nncluster stdt)"
		}
		if inlist(`rf_step',10) {
			local folder = "RDROBUST plots rf outcomes cluster by district"
			local ftag = "RF outcomes, cluster by district"
			local title2 = "(cluster by district)"
			local vce = "vce(cluster stdt)"
		}
		if inlist(`rf_step',11) {
			local folder = "RDROBUST plots rf outcomes CERRD bandwidth"
			local ftag = "RF outcomes, CERRD bandwidth"
			local title2 = "(CERRD bandwidth)"
			local bwmethod = "cerrd"
		}

		local dprtag = "hi_vill"
		
		// Run first-stage regresssion
		di "rdrobust $yvar tot_p if `ifs', c(299.5) covs(`controls_base' $control `fe') kernel(`kernel') bwselect(`bwmethod') p(`poly') all `vce'"
		rdrobust $yvar tot_p if `ifs', c(299.5) covs(`controls_base' $control `fe') kernel(`kernel') bwselect(`bwmethod') p(`poly') all `vce'

		// Generate in-sample indicator and store bandwidths
		cap drop temp_in_reg
		qui gen temp_in_reg = `ifs' & inrange(tot_p,299.5-e(h_l),299.5+e(h_r)) & ${yvar}!=.
		local h_l = e(h_l)
		local h_r = e(h_r)

		// Store results
		local row = `row' + 1
		qui replace rf_step = `rf_step' in `row'
		qui replace yvar = "$yvar" in `row'
		qui replace ifs = "`ifs'" in `row'
		qui replace controls_base = "`controls_base'" in `row'
		qui replace control = "$control" in `row'
		qui replace fe = "`fe'" in `row'
		qui replace kernel = e(kernel) in `row'
		qui replace bwmethod = e(bwselect) in `row'
		qui replace vce = "`vce'" in `row'
		qui replace polynomial_order = e(p) in `row'
		qui replace beta_conv = e(tau_cl) in `row'
		qui replace beta_robust = e(tau_bc) in `row'
		qui replace se_conv = e(se_tau_cl) in `row'
		qui replace se_robust = e(se_tau_rb) in `row'
		qui replace pval_conv = e(pv_cl) in `row'
		qui replace pval_robust = e(pv_rb) in `row'
		qui replace lci_conv = e(ci_l_cl) in `row'
		qui replace uci_conv = e(ci_r_cl) in `row'
		qui replace lci_robust = e(ci_l_rb) in `row'
		qui replace uci_robust = e(ci_r_rb) in `row'
		qui replace bw_lo = e(h_l) in `row'
		qui replace bw_hi = e(h_r) in `row'
		qui replace nobs_orig = e(N) in `row'
		qui replace nobs_left = e(N_h_l) in `row'
		qui replace nobs_right = e(N_h_r) in `row'
		qui replace nobs_total = e(N_h_l) + e(N_h_r) in `row'
		qui unique stdt if temp_in_reg==1
		qui replace ndist = r(unique) in `row'
		qui sum $yvar if temp_in_reg==1 & tot_p<299.5
		qui replace ymean = r(mean) in `row'
		qui replace ftag = "`ftag'" in `row'
		qui replace dprtag = "`dprtag'" in `row'

		// Residualize $yvar for regression sample
		qui reg $yvar `controls_base' $control `fe' if temp_in_reg==1
		cap drop y_resid_sample
		predict y_resid_sample, residuals
		
		// Residualize $yvar for [100,500] bandwidth
		qui reg $yvar `controls_base' $control `fe' if `ifs' & inrange(tot_p,299.5-${h_wide},299.5+${h_wide})
		cap drop y_resid_full
		predict y_resid_full, residuals
		
		// RD plots for regression sample (variable bandwidth) 
		foreach bins in 10 20 40 {
			if `h_l'<=75 {
				local xlab = "250 275 300 325 350"
			}
			else if inrange(`h_l',76,125) {
				local xlab = "200 250 300 350 400"
			}
			else if inrange(`h_l',126,175) {
				local xlab = "150 200 250 300 350 400 450"
			}
			else {
				local xlab = ""
			}
			rdplot y_resid_sample tot_p if temp_in_reg==1, ///
				c(299.5) p(`poly') nbins(`bins' `bins') kernel(`kernel') h(`h_l' `h_r') ///
				graph_options( ///
				title("$title" "`title2'", color(black) size(large)) ///
				ytitle("$ytitle", size(medlarge)) ///
				xtitle("2001 village population", size(medlarge)) ///
				ylabel(,nogrid angle(0) labsize(medlarge)) ///
				xlabel(`xlab', labsize(medlarge)) ///
				graphregion(color(white)) plotregion(fcolor(white)) graphregion(lcolor(white)) ///
				legend(off))
			graph export "$results/`folder'/${yvar}_`graphtag'_`dprtag'_inreg_`bins'.pdf", replace
		}

		if !inlist(`rf_step',11) {
			// RD plots for regression sample (constant bandwidth) 
			foreach bins in 20 40 {
				rdplot y_resid_full tot_p if `ifs' & inrange(tot_p,299.5-${h_wide},299.5+${h_wide}), ///
					c(299.5) p(`poly') nbins(`bins' `bins') kernel(`kernel') h(${h_wide} ${h_wide}) ///
					graph_options( ///
					title("$title" "`title2'", color(black) size(large)) ///
					ytitle("$ytitle", size(medlarge)) ///
					xtitle("2001 village population", size(medlarge)) ///
					ylabel(,nogrid angle(0) labsize(medlarge)) ///
					xlabel(, labsize(medlarge)) ///
					graphregion(color(white)) plotregion(fcolor(white)) graphregion(lcolor(white)) ///
					legend(off))
				graph export "$results/`folder'/${yvar}_`graphtag'_`dprtag'_wide_`bins'.pdf", replace
			}
		}
	}

	// Save results
	preserve
	keep rf_step-dprtag
	dropmiss, obs force
	if "$yvar"!="ec_firms2013" {
		tempfile reg_results
		save `reg_results'
		clear
		append using "$results/RDROBUST_outcomes_rf_ec_dpr.dta" `reg_results'
	}
	duplicates drop
	compress
	save "$results/RDROBUST_outcomes_rf_ec_dpr.dta", replace
	restore
	
}



}

****************************************************************** 
****************************************************************** 

